View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: LinksProvider.java,v 1.4 2005/07/08 12:09:08 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.common;
28  
29  import java.util.Hashtable;
30  import org.apache.log4j.Logger;
31  
32  
33  
34  /***
35   *
36   *
37   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
38   * @version <tt>$Revision: 1.4 $</tt>
39   */
40  public class LinksProvider implements Provider {
41  
42      private static LinksProvider instance;
43      private Queue queue;
44      private Hashtable retrievedLinks;
45      private Hashtable toBeConfirmedLinks;
46      private int size;
47      private static Logger log = SCLogger.getLogger(LinksProvider.class);
48      private static Logger logProv = SCLogger.getProviderLogger();
49  
50      /*** Creates a new instance of LinksQueue */
51      private LinksProvider() {
52          queue = new SimpleQueue();
53          retrievedLinks = new Hashtable();
54          toBeConfirmedLinks = new Hashtable();
55      }
56  
57      /***
58       *
59       * @return
60       */
61      public synchronized static LinksProvider instance() {
62          if (instance == null) {
63              instance = new LinksProvider();
64          }
65          return instance;
66      }
67  
68      /***
69       *
70       * @return
71       */
72      public synchronized Link next() {
73          log.debug("next(): BEGIN");
74          while (toBeConfirmedLinks.size() > 0 && size() == 0) {
75              try {
76                  wait();
77              } catch(Exception e) {}
78          }
79          Link next = null;
80          if (toBeConfirmedLinks.size() == 0 && size() == 0) {
81              log.warn("next(): Bad call: the provider is empty");
82          } else {
83              //get next link, register it as processed and remove it
84              //from the queue
85              log.debug("next(): queue size is " + queue.size());
86  
87              next = (Link)queue.get();
88              log.debug("next(): retrieving " + next);
89              toBeConfirmedLinks.put(next.toString(), "");
90          }
91          log.debug("next(): END");
92          logProv.info("provided: " + next + " queue size: " + queue.size()
93              + " retrieved TOT.:" + retrievedLinks.size()
94              + " to be confirmed TOT.:" + toBeConfirmedLinks.size());
95          return next;
96      }
97  
98      /***
99       *
100      * @param link
101      */
102     public synchronized void store(Link link) {
103         log.debug("store(): BEGIN");
104         //if already exists the file, ignore the put
105         String key = link.toString();
106         if (key != null &&
107                 !retrievedLinks.containsKey(key) &&
108                 !toBeConfirmedLinks.containsKey(key) &&
109                 !queue.contains(link)) {
110             log.debug("store(): storing " + link);
111             queue.put(link);
112             notify();
113             logProv.info("stored: " + link + " queue size: " + queue.size()
114             + " retrieved TOT.:" + retrievedLinks.size()
115             + " to be confirmed TOT.:" + toBeConfirmedLinks.size());
116         }
117         log.debug("store(): END");
118     }
119 
120     /***
121      *
122      * @param link
123      */
124     public synchronized void confirm(Link link) {
125         log.debug("confirm(): BEGIN");
126         //if already exists the file, ignore the put
127         retrievedLinks.put(link.toString(), "");
128         toBeConfirmedLinks.remove(link.toString());
129         logProv.info("confirmed: " + link + " queue size: " + queue.size()
130             + " retrieved TOT.:" + retrievedLinks.size()
131             + " to be confirmed TOT.:" + toBeConfirmedLinks.size());
132         log.debug("confirm(): END");
133     }
134 
135     /***
136      *
137      * @return
138      */
139     public synchronized int size(){
140         return queue.size();
141     }
142 
143     /***
144      *
145      * @return
146      */
147     public synchronized boolean isEmpty() {
148         return (size() == 0 && toBeConfirmedLinks.size() == 0);
149     }
150 }